import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline
sns.set_style('whitegrid')
This is basic data analysis on covid-19 vaccination drive that is undergoing around the world. As world has been facing serious pandemic from many months, many countries have successfully developed effective vaccine against this pandemic.
The data here is the record of the vaccination drive around the world. It is started from 2020-12-13 and captures the data up to 2021-03-10
df = pd.read_csv('country_vaccinations.csv')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5824 entries, 0 to 5823 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 5824 non-null object 1 iso_code 5476 non-null object 2 date 5824 non-null object 3 total_vaccinations 3717 non-null float64 4 people_vaccinated 3302 non-null float64 5 people_fully_vaccinated 2257 non-null float64 6 daily_vaccinations_raw 3131 non-null float64 7 daily_vaccinations 5637 non-null float64 8 total_vaccinations_per_hundred 3717 non-null float64 9 people_vaccinated_per_hundred 3302 non-null float64 10 people_fully_vaccinated_per_hundred 2257 non-null float64 11 daily_vaccinations_per_million 5637 non-null float64 12 vaccines 5824 non-null object 13 source_name 5824 non-null object 14 source_website 5824 non-null object dtypes: float64(9), object(6) memory usage: 682.6+ KB
df[['date']].tail()
| date | |
|---|---|
| 5819 | 2021-03-06 |
| 5820 | 2021-03-07 |
| 5821 | 2021-03-08 |
| 5822 | 2021-03-09 |
| 5823 | 2021-03-10 |
# This much null value is present in dataset
df.isna().sum()
country 0 iso_code 348 date 0 total_vaccinations 2107 people_vaccinated 2522 people_fully_vaccinated 3567 daily_vaccinations_raw 2693 daily_vaccinations 187 total_vaccinations_per_hundred 2107 people_vaccinated_per_hundred 2522 people_fully_vaccinated_per_hundred 3567 daily_vaccinations_per_million 187 vaccines 0 source_name 0 source_website 0 dtype: int64
It may happen because not every country is started vaccination or not record is maintaning properly. It is also possible that the data we are using is incomplet or some other reasons. However, we have the data and anaysis will happen on it.
## will replace null value with 0 assuming that there is no data available for that column or nothing has happend regarding that particular column.
## convert all the numeric values from object type to numeric type
## Convert date from object to datetime data type
## add three new column year, month, days
df.fillna(0, inplace=True)
df.head()
| country | iso_code | date | total_vaccinations | people_vaccinated | people_fully_vaccinated | daily_vaccinations_raw | daily_vaccinations | total_vaccinations_per_hundred | people_vaccinated_per_hundred | people_fully_vaccinated_per_hundred | daily_vaccinations_per_million | vaccines | source_name | source_website | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Albania | ALB | 2021-01-10 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | Pfizer/BioNTech | Ministry of Health | https://shendetesia.gov.al/covid19-ministria-e... |
| 1 | Albania | ALB | 2021-01-11 | 0.0 | 0.0 | 0.0 | 0.0 | 64.0 | 0.00 | 0.00 | 0.0 | 22.0 | Pfizer/BioNTech | Ministry of Health | https://shendetesia.gov.al/covid19-ministria-e... |
| 2 | Albania | ALB | 2021-01-12 | 128.0 | 128.0 | 0.0 | 0.0 | 64.0 | 0.00 | 0.00 | 0.0 | 22.0 | Pfizer/BioNTech | Ministry of Health | https://shendetesia.gov.al/covid19-ministria-e... |
| 3 | Albania | ALB | 2021-01-13 | 188.0 | 188.0 | 0.0 | 60.0 | 63.0 | 0.01 | 0.01 | 0.0 | 22.0 | Pfizer/BioNTech | Ministry of Health | https://shendetesia.gov.al/covid19-ministria-e... |
| 4 | Albania | ALB | 2021-01-14 | 266.0 | 266.0 | 0.0 | 78.0 | 66.0 | 0.01 | 0.01 | 0.0 | 23.0 | Pfizer/BioNTech | Ministry of Health | https://shendetesia.gov.al/covid19-ministria-e... |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5824 entries, 0 to 5823 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 5824 non-null object 1 iso_code 5824 non-null object 2 date 5824 non-null object 3 total_vaccinations 5824 non-null float64 4 people_vaccinated 5824 non-null float64 5 people_fully_vaccinated 5824 non-null float64 6 daily_vaccinations_raw 5824 non-null float64 7 daily_vaccinations 5824 non-null float64 8 total_vaccinations_per_hundred 5824 non-null float64 9 people_vaccinated_per_hundred 5824 non-null float64 10 people_fully_vaccinated_per_hundred 5824 non-null float64 11 daily_vaccinations_per_million 5824 non-null float64 12 vaccines 5824 non-null object 13 source_name 5824 non-null object 14 source_website 5824 non-null object dtypes: float64(9), object(6) memory usage: 682.6+ KB
df.total_vaccinations = df.total_vaccinations.astype(int)
df.people_vaccinated = df.people_vaccinated.astype(int)
df.people_fully_vaccinated = df.people_fully_vaccinated.astype(int)
df.daily_vaccinations_raw = df.daily_vaccinations_raw.astype(int)
df.daily_vaccinations = df.daily_vaccinations.astype(int)
df.total_vaccinations_per_hundred = df.total_vaccinations_per_hundred.astype(int)
df.people_vaccinated_per_hundred = df.people_vaccinated_per_hundred.astype(int)
df.people_fully_vaccinated_per_hundred = df.people_fully_vaccinated_per_hundred.astype(int)
df.daily_vaccinations_per_million = df.daily_vaccinations_per_million.astype(int)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5824 entries, 0 to 5823 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 5824 non-null object 1 iso_code 5824 non-null object 2 date 5824 non-null object 3 total_vaccinations 5824 non-null int64 4 people_vaccinated 5824 non-null int64 5 people_fully_vaccinated 5824 non-null int64 6 daily_vaccinations_raw 5824 non-null int64 7 daily_vaccinations 5824 non-null int64 8 total_vaccinations_per_hundred 5824 non-null int64 9 people_vaccinated_per_hundred 5824 non-null int64 10 people_fully_vaccinated_per_hundred 5824 non-null int64 11 daily_vaccinations_per_million 5824 non-null int64 12 vaccines 5824 non-null object 13 source_name 5824 non-null object 14 source_website 5824 non-null object dtypes: int64(9), object(6) memory usage: 682.6+ KB
date = df.date.str.split('-', expand=True)
date
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | 2021 | 01 | 10 |
| 1 | 2021 | 01 | 11 |
| 2 | 2021 | 01 | 12 |
| 3 | 2021 | 01 | 13 |
| 4 | 2021 | 01 | 14 |
| ... | ... | ... | ... |
| 5819 | 2021 | 03 | 06 |
| 5820 | 2021 | 03 | 07 |
| 5821 | 2021 | 03 | 08 |
| 5822 | 2021 | 03 | 09 |
| 5823 | 2021 | 03 | 10 |
5824 rows × 3 columns
df['year'] = date[0]
df['month']=date[1]
df['day']=date[2]
df.year = df.year.astype(int)
df.month=df.month.astype(int)
df.day = pd.to_numeric(df.day)
df.date = pd.to_datetime(df.date)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5824 entries, 0 to 5823 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 5824 non-null object 1 iso_code 5824 non-null object 2 date 5824 non-null datetime64[ns] 3 total_vaccinations 5824 non-null int64 4 people_vaccinated 5824 non-null int64 5 people_fully_vaccinated 5824 non-null int64 6 daily_vaccinations_raw 5824 non-null int64 7 daily_vaccinations 5824 non-null int64 8 total_vaccinations_per_hundred 5824 non-null int64 9 people_vaccinated_per_hundred 5824 non-null int64 10 people_fully_vaccinated_per_hundred 5824 non-null int64 11 daily_vaccinations_per_million 5824 non-null int64 12 vaccines 5824 non-null object 13 source_name 5824 non-null object 14 source_website 5824 non-null object 15 year 5824 non-null int64 16 month 5824 non-null int64 17 day 5824 non-null int64 dtypes: datetime64[ns](1), int64(12), object(5) memory usage: 819.1+ KB
# Finally Some highlights of dataset
print('Number of countries participated in vaccinations: ',len(df.country.unique()))
print('Date at which the vaccination started',df.date.min())
print('Data points ends at the date: ',df.date.max())
print('Nunber of vaccines are in use: ',len(df.vaccines.unique()))
Number of countries participated in vaccinations: 130 Date at which the vaccination started 2020-12-13 00:00:00 Data points ends at the date: 2021-03-10 00:00:00 Nunber of vaccines are in use: 24
df.describe().transpose()
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| total_vaccinations | 5824.0 | 1.336148e+06 | 5.840887e+06 | 0.0 | 0.0 | 26512.5 | 466527.50 | 95721290.0 |
| people_vaccinated | 5824.0 | 9.815566e+05 | 4.198054e+06 | 0.0 | 0.0 | 10690.0 | 294756.50 | 62451150.0 |
| people_fully_vaccinated | 5824.0 | 2.534459e+05 | 1.650972e+06 | 0.0 | 0.0 | 0.0 | 30394.75 | 32904161.0 |
| daily_vaccinations_raw | 5824.0 | 4.407231e+04 | 1.785066e+05 | 0.0 | 0.0 | 327.5 | 15335.75 | 2904229.0 |
| daily_vaccinations | 5824.0 | 5.634211e+04 | 1.850083e+05 | 0.0 | 1034.0 | 5921.0 | 27478.00 | 2169981.0 |
| total_vaccinations_per_hundred | 5824.0 | 5.057005e+00 | 1.286193e+01 | 0.0 | 0.0 | 0.0 | 4.00 | 133.0 |
| people_vaccinated_per_hundred | 5824.0 | 3.472184e+00 | 8.577710e+00 | 0.0 | 0.0 | 0.0 | 3.00 | 85.0 |
| people_fully_vaccinated_per_hundred | 5824.0 | 9.344093e-01 | 4.143049e+00 | 0.0 | 0.0 | 0.0 | 0.00 | 47.0 |
| daily_vaccinations_per_million | 5824.0 | 2.460897e+03 | 4.297707e+03 | 0.0 | 310.0 | 1127.0 | 2561.50 | 54264.0 |
| year | 5824.0 | 2.020943e+03 | 2.318730e-01 | 2020.0 | 2021.0 | 2021.0 | 2021.00 | 2021.0 |
| month | 5824.0 | 2.389423e+00 | 2.456588e+00 | 1.0 | 1.0 | 2.0 | 2.00 | 12.0 |
| day | 5824.0 | 1.503898e+01 | 9.010347e+00 | 1.0 | 7.0 | 15.0 | 23.00 | 31.0 |
df.country.unique()
array(['Albania', 'Algeria', 'Andorra', 'Anguilla', 'Argentina',
'Australia', 'Austria', 'Azerbaijan', 'Bahrain', 'Bangladesh',
'Barbados', 'Belarus', 'Belgium', 'Belize', 'Bermuda', 'Bolivia',
'Brazil', 'Bulgaria', 'Cambodia', 'Canada', 'Cayman Islands',
'Chile', 'China', 'Colombia', 'Costa Rica', 'Croatia', 'Cyprus',
'Czechia', 'Denmark', 'Dominica', 'Dominican Republic', 'Ecuador',
'Egypt', 'El Salvador', 'England', 'Estonia', 'Faeroe Islands',
'Falkland Islands', 'Finland', 'France', 'Germany', 'Ghana',
'Gibraltar', 'Greece', 'Greenland', 'Grenada', 'Guatemala',
'Guernsey', 'Guyana', 'Honduras', 'Hong Kong', 'Hungary',
'Iceland', 'India', 'Indonesia', 'Iran', 'Ireland', 'Isle of Man',
'Israel', 'Italy', 'Japan', 'Jersey', 'Jordan', 'Kazakhstan',
'Kuwait', 'Latvia', 'Lebanon', 'Liechtenstein', 'Lithuania',
'Luxembourg', 'Macao', 'Malaysia', 'Maldives', 'Malta',
'Mauritius', 'Mexico', 'Moldova', 'Monaco', 'Mongolia',
'Montenegro', 'Montserrat', 'Morocco', 'Myanmar', 'Nepal',
'Netherlands', 'New Zealand', 'Northern Cyprus',
'Northern Ireland', 'Norway', 'Oman', 'Pakistan', 'Panama',
'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar',
'Romania', 'Russia', 'Rwanda', 'Saint Helena', 'Saint Lucia',
'San Marino', 'Saudi Arabia', 'Scotland', 'Senegal', 'Serbia',
'Seychelles', 'Singapore', 'Slovakia', 'Slovenia', 'South Africa',
'South Korea', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland',
'Thailand', 'Trinidad and Tobago', 'Turkey',
'Turks and Caicos Islands', 'Ukraine', 'United Arab Emirates',
'United Kingdom', 'United States', 'Uruguay', 'Venezuela', 'Wales',
'Zimbabwe'], dtype=object)
wordCloud = WordCloud(background_color='white',max_font_size=50).generate(' '.join(df.country))
plt.figure(figsize=(15,7))
plt.axis('off')
plt.imshow(wordCloud)
plt.show()
country_wise_total_vaccinated = {}
for country in df.country.unique():
vaccinated = 0
for i in range(len(df)):
if df.country[i]==country:
vaccinated += df.daily_vaccinations[i]
country_wise_total_vaccinated[country]=vaccinated
country_wise_total_vaccinated_df = pd.DataFrame.from_dict(country_wise_total_vaccinated, orient='index',
columns=['total_vaccinated_till_date'])
country_wise_total_vaccinated_df.sort_values(by='total_vaccinated_till_date',ascending=False, inplace=True)
country_wise_total_vaccinated_df
| total_vaccinated_till_date | |
|---|---|
| United States | 89468175 |
| China | 49687760 |
| United Kingdom | 23297315 |
| India | 22256015 |
| England | 19639663 |
| ... | ... |
| Montserrat | 645 |
| Trinidad and Tobago | 441 |
| Grenada | 145 |
| Honduras | 0 |
| Saint Helena | 0 |
130 rows × 1 columns
fig = px.bar(country_wise_total_vaccinated_df,
y='total_vaccinated_till_date',
x=country_wise_total_vaccinated_df.index,
color='total_vaccinated_till_date',
color_discrete_sequence=px.colors.sequential.Viridis_r
)
fig.update_layout(
title={
'text':'Vaccination till date in various countries',
'y':0.95,
'x':0.5
},
xaxis_title='Countries',
yaxis_title='Total Vaccinated',
legend_title='Total Vaccinated'
)
fig.show()
# Countrywise daily vaccination
fig = px.line(df, x='date', y='daily_vaccinations',color='country')
fig.update_layout(
title={
'text':'Daily Vaccination trend',
'y':0.95,
'x':0.5
},
xaxis_title='Date',
yaxis_title='Daily Vaccinations'
)
fig.show()
def plot_till_date(value1, value2, title, color1, color2):
so_far_dict = {}
for dates in df.date.unique():
so_far_dict[dates],value1_count,value2_count=[],0,0
for i in range(len(df)):
if df.date[i]==dates:
value1_count+=df[value1][i]
value2_count+=df[value2][i]
so_far_dict[dates].append(value1_count)
so_far_dict[dates].append(value2_count)
so_far_df = pd.DataFrame.from_dict(so_far_dict, orient='index',columns=[value1, value2])
so_far_df.reset_index(inplace=True)
so_far_df.sort_values(by='index',inplace=True)
plot = go.Figure(data=[go.Scatter(
x = so_far_df['index'],
y = so_far_df[value1],
stackgroup='one',
name = value1,
marker_color=color1
),
go.Scatter(
x = so_far_df['index'],
y = so_far_df[value2],
stackgroup = 'one',
name=value2,
marker_color=color2
)])
plot.update_layout(
title={
'text':title,
'y': 0.95,
'x':0.5
},
xaxis_title='Date'
)
display(so_far_df.head(5))
return plot.show()
plot_till_date('people_fully_vaccinated','people_vaccinated','People vaccinated vs fully vaccinated till date',
'#c4eb28','#35eb28')
| index | people_fully_vaccinated | people_vaccinated | |
|---|---|---|---|
| 87 | 2020-12-13 | 0 | 172930 |
| 78 | 2020-12-14 | 0 | 0 |
| 79 | 2020-12-15 | 0 | 28500 |
| 80 | 2020-12-16 | 0 | 0 |
| 81 | 2020-12-17 | 0 | 0 |
plot_till_date('people_fully_vaccinated_per_hundred','people_vaccinated_per_hundred',
'People vaccinated vs fully vaccinated per hundred till date','#0938e3','#7127cc')
| index | people_fully_vaccinated_per_hundred | people_vaccinated_per_hundred | |
|---|---|---|---|
| 87 | 2020-12-13 | 0 | 0 |
| 78 | 2020-12-14 | 0 | 0 |
| 79 | 2020-12-15 | 0 | 0 |
| 80 | 2020-12-16 | 0 | 0 |
| 81 | 2020-12-17 | 0 | 0 |
# Pie Plot
def plot_pie(value, title, color):
new_dict = {}
for v in df[value].unique():
value_count=0
for i in range(len(df)):
if df[value][i]==v:
value_count+=1
new_dict[v]=value_count
print(new_dict)
new_df = pd.DataFrame.from_dict(new_dict, orient='index',columns=['Total'])
if color=='plasma':
fig = px.pie(new_df, values='Total',
names = new_df.index,
title = title,
color_discrete_sequence=px.colors.sequential.Plasma)
elif color=='rainbow':
fig = px.pie(new_df, values='Total',
names = new_df.index,
title = title,
color_discrete_sequence=px.colors.sequential.Rainbow)
else:
fig = px.pie(new_df, values='Total',
names=new_df.index,
title=title)
fig.update_layout(
title={
'x':0.5,
'y':0.95
},
legend_title = value
)
return fig.show()
df.vaccines.unique()
array(['Pfizer/BioNTech', 'Sputnik V', 'Oxford/AstraZeneca',
'Oxford/AstraZeneca, Sinopharm/Beijing, Sputnik V',
'Oxford/AstraZeneca, Pfizer/BioNTech',
'Moderna, Oxford/AstraZeneca, Pfizer/BioNTech', 'Sinovac',
'Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V',
'Oxford/AstraZeneca, Sinovac', 'Sinopharm/Beijing',
'Moderna, Pfizer/BioNTech', 'Pfizer/BioNTech, Sinovac',
'Sinopharm/Beijing, Sinopharm/Wuhan, Sinovac', 'Moderna',
'Moderna, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V',
'Covaxin, Oxford/AstraZeneca',
'Pfizer/BioNTech, Sinopharm/Beijing',
'Oxford/AstraZeneca, Pfizer/BioNTech, Sputnik V',
'Sinopharm/Beijing, Sputnik V',
'Oxford/AstraZeneca, Sinopharm/Beijing', 'EpiVacCorona, Sputnik V',
'Johnson&Johnson',
'Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinopharm/Wuhan, Sputnik V',
'Johnson&Johnson, Moderna, Pfizer/BioNTech'], dtype=object)
plot_pie('vaccines','Various vaccines and their uses','plasma')
{'Pfizer/BioNTech': 1095, 'Sputnik V': 197, 'Oxford/AstraZeneca': 400, 'Oxford/AstraZeneca, Sinopharm/Beijing, Sputnik V': 92, 'Oxford/AstraZeneca, Pfizer/BioNTech': 842, 'Moderna, Oxford/AstraZeneca, Pfizer/BioNTech': 1508, 'Sinovac': 193, 'Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V': 140, 'Oxford/AstraZeneca, Sinovac': 54, 'Sinopharm/Beijing': 114, 'Moderna, Pfizer/BioNTech': 358, 'Pfizer/BioNTech, Sinovac': 126, 'Sinopharm/Beijing, Sinopharm/Wuhan, Sinovac': 76, 'Moderna': 6, 'Moderna, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V': 73, 'Covaxin, Oxford/AstraZeneca': 55, 'Pfizer/BioNTech, Sinopharm/Beijing': 45, 'Oxford/AstraZeneca, Pfizer/BioNTech, Sputnik V': 77, 'Sinopharm/Beijing, Sputnik V': 17, 'Oxford/AstraZeneca, Sinopharm/Beijing': 102, 'EpiVacCorona, Sputnik V': 86, 'Johnson&Johnson': 23, 'Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinopharm/Wuhan, Sputnik V': 64, 'Johnson&Johnson, Moderna, Pfizer/BioNTech': 81}
plot_pie('source_name','Sources','plasma')
{'Ministry of Health': 1791, 'Government of Andorra': 33, 'Government of Australia via covidlive.com.au': 24, 'Government of Azerbaijan': 53, 'Directorate General of Health Services': 43, 'Sciensano': 72, 'Government of Bermuda': 53, 'Regional governments via Coronavirus Brasil': 54, 'Official data from provinces via covid19tracker.ca': 87, 'Cayman Islands Government': 70, 'Department of Statistics and Health Information': 77, 'National Health Commission': 148, 'Government of Cyprus': 59, 'Statens Serum Institut': 74, 'Government of Dominica': 19, 'Ministry of Public Health': 24, 'Government of Ecuador via Ecuacovid': 48, 'Government of the United Kingdom': 435, 'National Health Board': 73, 'Government of the Faeroe Islands': 39, 'Government of the Falkland Islands': 16, 'Finnish Institute for Health and Welfare': 70, 'Public Health France': 73, 'Robert Koch Institut': 74, 'Government of Ghana': 10, 'Government of Gibraltar': 60, 'Government of Greenland': 34, 'Government of Grenada': 6, 'Government of Guernsey': 38, 'Government of Honduras': 1, 'Government of Hong Kong': 18, 'Government of Hungary': 73, 'Directorate of Health': 69, 'Government of Iran': 10, 'Heath Service Executive': 67, 'Isle of Man Government': 49, 'Government of Israel': 82, 'Extraordinary commissioner for the Covid-19 emergency': 74, 'Ministry of Health, Labour and Welfare': 22, 'Government of Jersey': 36, 'Government of Jordan': 45, 'Government of Kazakhstan': 25, 'National Health Service': 69, 'Government of Lebanon': 26, 'Federal Office of Public Health': 76, 'Government of Luxembourg': 70, 'Government of Macao': 11, 'Government of Malaysia': 15, 'Presidency of the Maldives': 37, 'COVID-19 Malta Public Health Response Team': 52, 'National Communication Committee on COVID-19': 24, 'Secretary of Health': 77, 'National Council': 20, 'Government of Montenegro': 17, 'Government of Montserrat': 16, 'Government of Nepal': 26, 'National Institute for Public Health and the Environment': 50, 'Norwegian Institute of Public Health': 73, 'National Command and Operation Centre': 20, 'Government of Paraguay': 18, 'Government of the Philippines': 9, 'General Directorate of Health via Data Science for Social Good': 74, 'National Strategic Group on COVID-19': 76, 'Government of Romania': 73, 'Official data from local governments via gogov.ru': 86, 'Government of Saint Helena': 1, 'Social Security Institute': 11, 'Saudi Health Council': 64, 'Government of Serbia': 62, 'Extended Programme for Immunisation': 60, 'National Institute of Public Health, via Sledilnik': 73, 'Korea Centers for Disease Control and Prevention': 14, 'Public Health Agency of Sweden': 74, 'Government of Thailand': 4, 'COVID-19 Vaccine Information Platform': 57, 'National Emergency Crisis and Disaster Management Authority': 64, 'Centers for Disease Control and Prevention': 81, 'Government of Venezuela': 16}
wordCloud = WordCloud(background_color='white',
max_font_size=50).generate(' '.join(df.vaccines))
plt.figure(figsize=(12,5))
plt.axis('off')
plt.imshow(wordCloud)
plt.show()
# Daily vaccination trend per million
fig = px.line(df, x='date',y='daily_vaccinations_per_million', color='country')
fig.update_layout(
title={
'text':'Daily vaccination trend per million',
'x':0.5,
'y':0.95
},
xaxis_title='Date',
yaxis_title='Daily Vaccinations per million'
)
fig.show()
india_usa = [df[df.country=='India'], df[df.country=='United States']]
result = pd.concat(india_usa)
fig = px.line(result, x='date',y='total_vaccinations',color='country')
fig.update_layout(
title={
'text':'Total Vaccinated-India vs United States',
'x':0.5,
'y':0.95
},
xaxis_title = 'Date',
yaxis_title = 'Total Vaccinations'
)
fig.show()
def plot_map(variable, title, color):
new_dict = {}
for country in df.country.unique():
for i in range(len(df)):
if df.country[i]==country:
new_dict[country]=df[variable][i]
new_dict
new_df = pd.DataFrame.from_dict(new_dict,orient='index',columns=[variable])
if color==None:
place_map = px.choropleth(new_df, locations=new_df.index,
locationmode='country names',
color=variable,
hover_name=new_df.index)
else:
place_map = px.choropleth(new_df, locations=new_df.index,
locationmode='country names',
color=variable,
hover_name = new_df.index,
color_continuous_scale=color)
place_map.update_layout(
title_text = title,
title_x = 0.5,
geo = dict(showocean=True, oceancolor='#7af8ff',
showland=True, landcolor='white',
showframe=False)
)
return place_map.show()
plot_map('total_vaccinations','Most Vaccinated country', None)
plot_map('vaccines','Vaccines Used in Different countries', None)
plot_map('people_fully_vaccinated','People fully vaccinated in Different countries','haline')
def plot_animation(value, title, color):
new_df = df[df.year==2021]
new_df.sort_values(by='day',inplace=True)
if color==None:
place_map = px.choropleth(new_df, locations='country',
locationmode='country names',
color=value,
hover_name='country',
animation_frame='day')
else:
place_map = px.choropleth(new_df, locations='country',
locationmode='country names',
color=value,
hover_name='country',
animation_frame='day',
color_continuous_scale=color)
place_map.update_layout(
title_text = title,
title_x = 0.5,
geo = dict(showocean=True, oceancolor='#7af8ff',
showland=True, landcolor='white',
showframe=False)
)
return place_map.show()
plot_animation('daily_vaccinations','Animation of daily vaccinations through January 2021 in different countries',None)
<ipython-input-39-1001565589c3>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
plot_animation('daily_vaccinations_per_million','Animation of daily Vaccinations per million through january 2021 in different countries','Plotly3')
<ipython-input-39-1001565589c3>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
fig = px.scatter_3d(df.head(500), x='date', y='people_vaccinated', z='people_fully_vaccinated',
color='country',hover_data=['country'],
size='people_fully_vaccinated',
opacity=0.9,
symbol='country')
fig.update_layout(title='Date vs People vaccinated vs People Fully Vaccinated|3D')
fig.update_layout(title={
'y':0.95,
'x':0.5
})
fig.show()
def size(m,n):
fig = plt.gcf()
fig.set_size_inches(m,n)
sns.heatmap(df.corr())
plt.title('Corelation Heatmap')
plt.yticks(rotation=0)
size(10,7)